This script reads in both PBMC and Liver Seurat objects, and generates select figures used in the manuscript.
Load libraries
library(Seurat)
library(scClustViz)
library(ggplot2)
library(dplyr)
library(rcartocolor)
library(SeuratWrappers)
library(scCustomize)
scCustomize v1.1.1
If you find the scCustomize useful please cite.
See 'samuel-marsh.github.io/scCustomize/articles/FAQ.html' for citation info.
Read in liver map
sobj <- readRDS("~/Dropbox/Zoe/scf_version/analysis/healthy_sc/seurat_objects/dropletQC_filtered/allIntegrated_cca_kanchor5_noBiopsyHeps_dropletQCFiltered.RDS")
res <- "integrated_snn_res.1.4"
Idents(sobj) <- res
tissue <- "liver"
Read in PBMC map
load("~/Dropbox/Zoe/scf_version/analysis/healthy_sc/seurat_objects/no_dropletQC/integrated_PBMC_cca_kanchor5_scClustViz.RData")
sobj <- scSeurat
res <- "integrated_snn_res.0.6"
Idents(sobj) <- res
tissue <- "PBMC"
UMAP with no cluster numbers
plot <- DimPlot(sobj, label = FALSE) & NoLegend()
plot
pdf(paste("./figures/", tissue, "/", tissue, "_UMAP_clusters_noLabels.pdf", sep = ""))
plot
dev.off()
png
2
UMAP with cluster numbers
plot <- DimPlot(sobj, label = TRUE)
plot
pdf(paste("./figures/", tissue, "/", tissue, "_UMAP_clusters_labels.pdf", sep = ""))
plot
dev.off()
png
2
Map with SCINA-generated cell-type labels
DimPlot(sobj, group.by = "scina_labels_refined", label = TRUE) & NoLegend()
Map with general cell-type labels for paper
plot <- DimPlot(sobj, group.by = "general_cell_labels",
label = TRUE, repel = TRUE) +
ggtitle(NULL)
plot
pdf(paste("./figures/", tissue, "/", tissue, "_UMAP_general_cell_labels.pdf", sep = ""),
height = 8,
width = 12)
plot
dev.off()
png
2
Map grouping by general cell type labels but with no labels on plot
plot <- DimPlot(sobj, group.by = "general_cell_labels") +
ggtitle(NULL) &
NoLegend()
plot
pdf(paste("./figures/", tissue, "/", tissue, "_UMAP_general_cell_labels_noLabels.pdf",
sep = ""))
plot
dev.off()
png
2
Map with original identities
plot <- DimPlot(sobj, group.by = "orig.ident",
cols = carto_pal(length(levels(as.factor(sobj$orig.ident))), "Safe")) +
ggtitle(NULL)
plot
pdf(paste("./figures/", tissue, "/", tissue, "_UMAP_orig_idents.pdf",
sep = ""),
height = 5, width = 7)
plot
dev.off()
png
2
Barplot with original identities on a cluster-level grouping:
# Meta data to plot:
df <- sobj@meta.data
# Check what column the cluster identities are in
col <- which(colnames(df) == res)
# Order clusters
df[,col] <- factor(Idents(sobj),
levels = c(sort(as.numeric(levels(Idents(sobj))))))
# Basic plot of clusters by replicate
ggplot(df, aes(x = get(res), fill = orig.ident)) +
geom_bar() +
theme(axis.text = element_text(size = 7))
# Plot as proportion or percentage of cluster
ggplot(df, aes(x = get(res), fill = orig.ident)) +
geom_bar(position = "fill") +
theme(axis.text = element_text(size = 7))
Barplot with original identities on a grouped by general cell labels:
df <- sobj@meta.data
plot1 <- ggplot(df, aes(x = general_cell_labels, fill = orig.ident)) +
geom_bar() +
scale_fill_carto_d(name = NULL, palette = "Safe") +
theme_bw() +
theme(axis.text = element_text(size = 8),
axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
axis.title.x = element_blank()) +
ylab("Number of cells")
plot1
pdf(paste("./figures/", tissue, "/", tissue, "_barplot_orig_ident_counts.pdf",
sep = ""))
plot1
dev.off()
png
2
# Plot as proportion or percentage of cluster
plot2 <- ggplot(df, aes(x = general_cell_labels, fill = orig.ident)) +
geom_bar(position = "fill") +
scale_fill_carto_d(name = NULL, palette = "Safe") +
theme_bw() +
theme(axis.text = element_text(size = 8),
axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
axis.title.x = element_blank()) +
ylab("Proportion of cells")
plot2
pdf(paste("./figures/", tissue, "/", tissue, "_barplot_orig_ident_proportions.pdf",
sep = ""))
plot2
dev.off()
png
2
Generate dotplot with specific markers
DotPlot(sobj,
assay = "SCT",
features = c("PTPRC", "CALCRL", "NKG7", "CD3E", "MARCO", "LYZ-1", "CD19", "MS4A1", "STAB2")
) +
ggtitle("Select features for liver map")
Calculate markers for general cell labels then reset resolution
Idents(sobj) <- "general_cell_labels"
sobj_markers <- RunPrestoAll(sobj,
only.pos = TRUE,
min.pct = 0.25,
logfc.threshold = 0.25)
Calculating cluster CD3+ Tissue-resident memory T cells
Calculating cluster CD14-/CD16+ Non-classical monocytes
Calculating cluster CD14+/CD16+ Monocytes
Calculating cluster Endothelial
Calculating cluster CD3+ NK-like cells
Calculating cluster Macrophages
Calculating cluster CD3+ Regulatory T cells
Calculating cluster B cells
Calculating cluster Hepatocytes
Calculating cluster Cholangiocytes
Calculating cluster Antibody-secreting B cells
Calculating cluster Mesenchymal
Calculating cluster Doublets/Contamination
Calculating cluster CD3+ Proliferating T cells
Calculating cluster cDC1 Conventional DC type 1
sobj_markers %>%
group_by(cluster) %>%
slice_max(n = 2, order_by = avg_log2FC)
Idents(sobj) <- res
Save markers
groups = "general_cell_labels"
write.table(sobj_markers,
file = paste("./figures/", tissue, "/",
tissue, "_markers_", groups, ".tsv",
sep = ""),
quote = FALSE,
sep = "\t",
row.names = FALSE,
col.names = TRUE)
Generate heatmap with top 5 markers grouping by general cell types
# Remove mikado genes from marker list
sobj_markers <- sobj_markers[grep("mikado", rownames(sobj_markers), invert = TRUE),]
sobj_markers %>%
group_by(cluster) %>%
top_n(n = 5, wt = avg_log2FC) -> top
# If liver, select fewer cells
if (tissue == "liver") {
cells <- sample(colnames(sobj), size = 30000)
} else if (tissue == "PBMC") {
cells <- colnames(sobj)
}
DoHeatmap(sobj, features = top$gene, group.by = "general_cell_labels", size = 3,
angle = 90, cells = cells) +
NoLegend() +
theme(text = element_text(size = 7))
Warning: The following features were omitted as they were not found in the scale.data slot for the SCT assay: H2AZ2;H2AZ1
Make PDF of heatmap
groups <- "general_cell_labels"
pdf(paste("./figures/", tissue, "/", tissue, "_heatmap_", groups, ".pdf", sep = ""),
height = 11,
width = 7)
DoHeatmap(sobj, features = top$gene, group.by = "general_cell_labels", size = 2,
angle = 90, cells = cells) +
NoLegend() +
theme(text = element_text(size = 7))
Warning: The following features were omitted as they were not found in the scale.data slot for the SCT assay: H2AZ2;H2AZ1
dev.off()
null device
1
Make specific plots with specific genes. The genes we are interested in include: PTPRC, CALCRL, NKG7, CD3E, MARCO, LYZ, CD19, MS4A1, STAB2, ALB, CD4, CD8A, CLEC4G, CD5L, C1QB, ACTA2, VWF, IGLL5, CD68. Can also plot in italics.
geneCode <- "sct_LYZ-1" # Woodchuck-specific nomenclature for this genome
gene <- "LYZ"
mapType <- "Liver"
FeaturePlot(sobj, features = geneCode) +
ggtitle(paste(gene, "-", mapType, "map"))
FeaturePlot(sobj, features = geneCode) +
ggtitle(bquote(~italic(.(gene))))
Another version of the feature plot that outputs genes in italics
if (tissue == "PBMC") {
geneCodes <- c("sct_PTPRC","sct_NKG7","sct_CD14",
"sct_CD3E","sct_MARCO","sct_LYZ-1",
"sct_CD19","sct_MS4A1","sct_STAB2",
"sct_CD4","sct_CD8A", "sct_XCL1;XCL2",
"sct_CD5L","sct_C1QB", "sct_LEF1",
"sct_ACTA2","sct_VWF","sct_IGLL5-1",
"sct_CD68","sct_FCGR3A;FCGR3B","sct_TOP2A")
genes <- c("PTPRC","NKG7","CD14","CD3E","MARCO","LYZ",
"CD19","MS4A1","STAB2","CD4","CD8A", "XCL1;XCL2",
"CD5L","C1QB","LEF1","ACTA2","VWF","IGLL5","CD68",
"FCGR3A;FCGR3B","TOP2A")
} else if (tissue == "liver") {
geneCodes <- c("sct_Ptprc","sct_CALCRL","sct_NKG7",
"sct_CD3E","sct_MARCO","sct_LYZ-1",
"sct_CD19","sct_MS4A1","sct_STAB2",
"sct_ALB-1","sct_CD4","sct_CD8A",
"sct_CLEC4G","sct_CD5L","sct_C1QB",
"sct_ACTA2","sct_VWF","sct_IGLL5-1",
"sct_CD68","sct_XCL1;XCL2","sct_LEF1",
"sct_RSPO3","sct_MECOM")
genes <- c("PTPRC","CALCRL","NKG7","CD3E","MARCO","LYZ",
"CD19","MS4A1","STAB2","ALB","CD4","CD8A",
"CLEC4G","CD5L","C1QB","ACTA2","VWF","IGLL5","CD68",
"XCL1;XCL2","LEF1","RSPO3","MECOM")
}
for(num in 2:length(geneCodes)) {
plot <- FeaturePlot(sobj,
features = geneCodes[num]) +
ggtitle(bquote(~italic(.(genes[num]))))
print(plot)
pdf(paste("./figures/", tissue, "/", tissue, "_", genes[num], "_UMAP.pdf", sep = ""))
print(plot)
dev.off()
}
Plot marker genes for specific cell populations
Endothelial cells:
geneCodes <- c("STAB2","ITGA1","CD55","LYVE1",
"CD34","VWF","IFITM3;IFITM2;IFITM1","RSPO3",
"MECOM","Mecom","CALCRL","LOC114089654",
"RAMP2","BST2","CLEC4G","CTSV;CTSL",
"STAB1","PLAC9","PECAM1","WNT2",
"MYL6","NR2F2")
genes <- c("STAB2","ITGA1","CD55","LYVE1",
"CD34","VWF","IFITM3","RSPO3",
"MECOM","Mecom","CALCRL","LOC114089654",
"RAMP2","BST2","CLEC4G","CTSV;CTSL",
"STAB1","PLAC9","PECAM1","WNT2",
"MYL6","NR2F2")
for(num in 1:length(geneCodes)) {
plot <- FeaturePlot_scCustom(sobj,
features = geneCodes[num],
colors_use = viridis_light_high) +
ggtitle(bquote(~italic(.(genes[num]))))
print(plot)
pdf(paste("./figures/", tissue, "/", tissue, "_", genes[num], "_UMAP.pdf", sep = ""))
print(plot)
dev.off()
}
Lymphocytes:
Lymphocytes weird genes:
Mesenchyme:
geneCodes <- c("HHIP","COL1A2","COL3A1","IGFBP7",
"IGFBP3","DCN","COL1A1","SPARC",
"RBP1", "CALCRL")
genes <- c("HHIP","COL1A2","COL3A1","IGFBP7",
"IGFBP3","DCN","COL1A1","SPARC",
"RBP1","CALCRL")
for(num in 1:length(geneCodes)) {
plot <- FeaturePlot_scCustom(sobj,
features = geneCodes[num],
colors_use = viridis_light_high) +
ggtitle(bquote(~italic(.(genes[num]))))
print(plot)
pdf(paste("./figures/", tissue, "/", tissue, "_", genes[num], "_UMAP.pdf", sep = ""))
print(plot)
dev.off()
}
Cholangiocytes:
geneCodes <- c("KRT19","CFTR","EPCAM","SLC4A4")
genes <- c("KRT19","CFTR","EPCAM","SLC4A4")
for(num in 1:length(geneCodes)) {
plot <- FeaturePlot_scCustom(sobj,
features = geneCodes[num],
colors_use = viridis_light_high) +
ggtitle(bquote(~italic(.(genes[num]))))
print(plot)
pdf(paste("./figures/", tissue, "/", tissue, "_", genes[num], "_UMAP.pdf", sep = ""))
print(plot)
dev.off()
}
Look at zonation gene signatures:
cv_genes <- c("FETUB","HMGCS1","CYP2E1","GLUD1;GLUD2","CYP1A2",
"RGN","INMT","COMT","FDPS","SPR-1")
cv_genes
[1] "FETUB" "HMGCS1" "CYP2E1" "GLUD1;GLUD2" "CYP1A2" "RGN"
[7] "INMT" "COMT" "FDPS" "SPR-1"
pp_genes <- c("Saa2;Saa1-1","HAMP","APOA1","APOC2","CRYL1",
"AMY1A;AMY1C;AMY1B;AMY2A;AMY2B","MT-ATP6","UROC1",
"APOA2","MT-CO3")
pp_genes
[1] "Saa2;Saa1-1" "HAMP"
[3] "APOA1" "APOC2"
[5] "CRYL1" "AMY1A;AMY1C;AMY1B;AMY2A;AMY2B"
[7] "MT-ATP6" "UROC1"
[9] "APOA2" "MT-CO3"
Endothelial cells:
# One set of features
geneCodes <- c("STAB2","ITGA1","LYVE1","IFITM3;IFITM2;IFITM1",
"CALCRL","RAMP2","BST2", "CLEC4G",
"CTSV;CTSL", "STAB1", "VWF", "CD34",
"MECOM","PLAC9","RSPO3","WNT2")
# Second set of features
geneCodes <- c("CLEC4G","STAB1","STAB2","LYVE1",
"CD34","MECOM","VWF","RSPO3",
"WNT2","ACKR1")
plot <- FeaturePlot_scCustom(sobj,
features = geneCodes,
colors_use = viridis_light_high,
num_columns = 4)
plot
pdf(paste("./figures/", tissue, "/", tissue, "_endothelialMarkers_UMAP.pdf", sep = ""),
height = 18, width = 24)
plot
dev.off()
png
2
T lymphocytes
geneCodes <- c("CD3D","CD3E","CD4","CD8A",
"CTLA4","IL7R-1","LEF1","EOMES",
"TIGIT","KLRB1","KLRD1","NKG7",
"XCL1;XCL2","TOX","GZMA","GZMK")
geneCodes <- c("CD3D","CD3E","GIMAP1","GIMAP4",
"GIMAP6","GIMAP7","CD4","CD28",
"FOXP1","CTLA4","CD38","XCL1;XCL2",
"IL2RA","NKG7","CD8A","CCL5",
"CD69","CD2","GZMK","BRCA2",
"TOP2A", "STMN1")
plot <- FeaturePlot_scCustom(sobj,
features = geneCodes,
colors_use = viridis_light_high,
num_columns = 4)
plot
pdf(paste("./figures/", tissue, "/", tissue, "_lymphocyteMarkers_UMAP.pdf", sep = ""),
height = ceiling(length(geneCodes)/4)*6, width = 24)
plot
dev.off()
png
2
T lymphocytes for PBMCs
geneCodes <- c("CD3D","CD3E","XCL1;XCL2","NKG7",
"LEF1","IL7R-1","GATA3","MAF",
"CCR4","S100A4","CXCR3","IL2RA",
"CD160","CD247","BRCA2","STMN1")
# CCR7 and TCF7 aren't showing up???
plot <- FeaturePlot_scCustom(sobj,
features = geneCodes,
colors_use = viridis_light_high,
num_columns = 4,
slot = "data")
plot
pdf(paste("./figures/", tissue, "/", tissue, "_lymphocyteMarkers_UMAP.pdf", sep = ""),
height = ceiling(length(geneCodes)/4)*6, width = 24)
plot
dev.off()
png
2
Mesenchyme:
geneCodes <- c("DCN","COL1A2","COL3A1","HHIP",
"CALCRL","SPARC","RBP1","COL1A1",
"IGFBP3", "IGFBP7")
geneCodes <- c("COL1A2", "COL3A1", "IGFBP7", "IGFBP3",
"DCN", "CALCRL", "COL1A1", "SPARC",
"RBP1", "HHIP","RBP1","LRAT",
"PDE3B","HGF","CNN2","ACTA2")
plot <- FeaturePlot_scCustom(sobj,
features = geneCodes,
colors_use = viridis_light_high,
num_columns = 4)
plot
pdf(paste("./figures/", tissue, "/", tissue, "_mesenchymalMarkers_UMAP.pdf", sep = ""),
height = 24, width = 24)
plot
dev.off()
png
2
Cholangiocytes:
geneCodes <- c("KRT19","SLC4A4","CFTR","EPCAM")
geneCodes <- c("ANXA2", "CST3", "BIRC3", "TESC",
"KRT19", "SOX9", "EPCAM","SLC4A4",
"CFTR")
plot <- FeaturePlot_scCustom(sobj,
features = geneCodes,
colors_use = viridis_light_high,
num_columns = 4)
plot
pdf(paste("./figures/", tissue, "/", tissue, "_cholangiocyteMarkers_UMAP.pdf", sep = ""),
height = ceiling(length(geneCodes)/4)*6, width = 24)
plot
dev.off()
png
2
Myeloid cells
geneCodes <- c("TYROBP","CD74","CTSS","VSIG4",
"MARCO", "CD5L","HMOX1","VCAM1",
"ITGAM","FCGR3A;FCGR3B","Fcgr3;Fcgr2b","CTSA",
"IL17RA-1","SIRPA;SIRPB1;SIRPG","BTLA", "CADM1",
"ID2","LY6E","FLT3", "CD14",
"NOTCH2","LYZ-1","S100A8")
plot <- FeaturePlot_scCustom(sobj,
features = geneCodes,
colors_use = viridis_light_high,
num_columns = 4)
Warning: The following features were omitted as they were not found:
ℹ MARCO, CD5L, HMOX1, VCAM1, CTSA, IL17RA-1, and LYZ-1
plot
pdf(paste("./figures/", tissue, "/", tissue, "_myeloidMarkers_UMAP.pdf", sep = ""),
height = ceiling(length(geneCodes)/4)*6, width = 24)
plot
dev.off()
png
2
Generate heatmaps comparing woodchuck clusters with various datasets
First, set up whatever woodchuck dataset I am working with by reading in the ortholog table, choosing whether the orthologs to use are human, mouse, or woodchuck, and calculating the average expression for each cluster. The output of this section is the scaled cluster gene-expression matrix
groups <- res
#groups <- "general_cell_labels"
# Read in ortholog table
geneNameTable <- read.table("~/Dropbox/Zoe/scf_version/make_gtf/orthofinder_sc2/homologene/collectedOrthofinderPairings.tsv",
sep = "\t",
header = TRUE)
woodchuckClusterAverages <- AverageExpression(sobj,
assays = "SCT",
slot = "scale.data",
group.by = groups)
Warning: `invoke()` is deprecated as of rlang 0.4.0.
# Scale data
woodchuckClusterAverages$SCT <- na.omit(t(scale(t(as.matrix(woodchuckClusterAverages$SCT)))))
# Grab gene names from Seurat object
uniqueHier <- row.names(woodchuckClusterAverages$SCT)
uniqueHier <- as.data.frame(uniqueHier)
# Bind with geneNameTable to get correct order (notice uniqueHier is on left)
newNames <- dplyr::left_join(uniqueHier, geneNameTable, by = "uniqueHier")
# Get orthologs from either mouse or human
species <- "human"
if (species == "human") {
# If human one-to-one ortho has NA, replace with mikado_final_sc2_stringent_noMito_protein column
# This is to avoid and potential mistakes in recognizing things it shouldn't be recognizing
newNames$speciesOneToOne <- ifelse(is.na(newNames$humanOneToOne), newNames$uniqueHier, newNames$humanOneToOne)
} else if (species == "mouse") {
newNames$speciesOneToOne <- ifelse(is.na(newNames$mouseOneToOne), newNames$uniqueHier, newNames$mouseOneToOne)
} else if (species == "woodchuck") {
newNames$speciesOneToOne <- newNames$uniqueHier
}
# Grab dataframe
woodchuckClusterAverages <- woodchuckClusterAverages$SCT
# Replace names with one-to-one orthologue of particular species
row.names(woodchuckClusterAverages) <- newNames$speciesOneToOne
# Make sure formatted correctly
woodchuckClusterAverages <- as.data.frame(woodchuckClusterAverages)
# Order by gene name
woodchuckClusterAverages <- woodchuckClusterAverages[order(row.names(woodchuckClusterAverages)),]
# Sanity check
head(woodchuckClusterAverages)
Correlation of woodchuck PBMCs with human 68k PBMC dataset from 10X Genomics
# Sanity check
head(allCellsMatrix)
Activated CD8+ Naive CD8+ Memory and Reg T Naive CD4+ NK CD8+ B
42430 -0.7290166 -1.4467216 -0.19073790 -0.4832807 0.02379348 -0.8928406 -0.2258430
42431 -0.2300094 -0.7300300 -0.26479348 -0.7995981 -0.18652940 -0.3474056 -0.6952459
42618 -0.1212209 -0.6309294 -0.09745206 -0.4354971 -0.36419075 -0.8527715 -0.3826776
A4GALT -0.4058024 -1.0280327 -0.68648235 -0.4734361 -0.11159565 -0.5478332 -0.1352675
AATK -1.0033368 -0.9033733 -0.11847519 -1.0810861 -0.10366579 -0.4257702 -0.2369504
ABCA1 -0.6939429 -1.0596759 -0.79368826 -0.8411861 -0.88868386 -0.1144697 0.6312458
Megakaryocytes Monocytes and Dendritic B, Dendritic, T
42430 1.314882 1.50210970 1.1276549
42431 2.578802 0.03087083 0.6439395
42618 2.691420 -0.14763062 0.3409502
A4GALT 2.062829 -0.23671805 1.5623391
AATK 1.958543 1.15513309 0.7589817
ABCA1 1.186970 1.02547753 1.5479533
Correlation of woodchuck liver with human liver dataset from MacParland et al. (2018)
# Find cluster averages of human liver data
load("~/Dropbox/Zoe/scf_version/analysis/correlationTests/HumanLiver.RData")
# Run SCTransform
HumanLiverSeurat <- UpdateSeuratObject(HumanLiverSeurat)
Validating object structure
Updating object slots
Ensuring keys are in the proper structure
Warning: Assay RNA changing from Assay to AssayWarning: DimReduc pca changing from DimReduc to DimReducWarning: DimReduc tsne changing from DimReduc to DimReducEnsuring keys are in the proper structure
Ensuring feature names don't have underscores or pipes
Updating slots in RNA
Updating slots in pca
Updating slots in tsne
Setting tsne DimReduc to global
Validating object structure for Assay ‘RNA’
Validating object structure for DimReduc ‘pca’
Validating object structure for DimReduc ‘tsne’
Object representation is consistent with the most current Seurat version
HumanLiverSeurat <- SCTransform(HumanLiverSeurat)
Running SCTransform on assay: RNA
vst.flavor='v2' set, setting model to use fixed slope and exclude poisson genes.
Calculating cell attributes from input UMI matrix: log_umi
Total Step 1 genes: 17501
Total overdispersed genes: 14764
Excluding 2737 genes from Step 1 because they are not overdispersed.
Variance stabilizing transformation of count matrix of size 18715 by 8444
Model formula is y ~ log_umi
Get Negative Binomial regression parameters per gene
Using 2000 genes, 5000 cells
|
| | 0%
Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = TRUE.Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = TRUE.Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = TRUE.Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = TRUE.Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = TRUE.Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = TRUE.
|
|================== | 25%
Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = TRUE.Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = TRUE.Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = TRUE.Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = TRUE.Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = TRUE.Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = TRUE.
|
|=================================== | 50%
Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = TRUE.Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = TRUE.Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = TRUE.Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = TRUE.Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = TRUE.Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = TRUE.
|
|==================================================== | 75%
Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = TRUE.Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = TRUE.Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = TRUE.Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = TRUE.Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = TRUE.Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = TRUE.
|
|======================================================================| 100%
Setting estimate of 117 genes to inf as theta_mm/theta_mle < 1e-3
# of step1 poisson genes (variance < mean): 0
# of low mean genes (mean < 0.001): 1363
Total # of Step1 poisson genes (theta=Inf; variance < mean): 136
Total # of poisson genes (theta=Inf; variance < mean): 3896
Calling offset model for all 3896 poisson genes
Found 156 outliers - those will be ignored in fitting/regularization step
Ignoring theta inf genes
Replacing fit params for 3896 poisson genes by theta=Inf
Setting min_variance based on median UMI: 0.04
Second step: Get residuals using fitted parameters for 18715 genes
|
| | 0%
|
|== | 3%
|
|==== | 5%
|
|====== | 8%
|
|======= | 11%
|
|========= | 13%
|
|=========== | 16%
|
|============= | 18%
|
|=============== | 21%
|
|================= | 24%
|
|================== | 26%
|
|==================== | 29%
|
|====================== | 32%
|
|======================== | 34%
|
|========================== | 37%
|
|============================ | 39%
|
|============================= | 42%
|
|=============================== | 45%
|
|================================= | 47%
|
|=================================== | 50%
|
|===================================== | 53%
|
|======================================= | 55%
|
|========================================= | 58%
|
|========================================== | 61%
|
|============================================ | 63%
|
|============================================== | 66%
|
|================================================ | 68%
|
|================================================== | 71%
|
|==================================================== | 74%
|
|===================================================== | 76%
|
|======================================================= | 79%
|
|========================================================= | 82%
|
|=========================================================== | 84%
|
|============================================================= | 87%
|
|=============================================================== | 89%
|
|================================================================ | 92%
|
|================================================================== | 95%
|
|==================================================================== | 97%
|
|======================================================================| 100%
Computing corrected count matrix for 18715 genes
|
| | 0%
|
|== | 3%
|
|==== | 5%
|
|====== | 8%
|
|======= | 11%
|
|========= | 13%
|
|=========== | 16%
|
|============= | 18%
|
|=============== | 21%
|
|================= | 24%
|
|================== | 26%
|
|==================== | 29%
|
|====================== | 32%
|
|======================== | 34%
|
|========================== | 37%
|
|============================ | 39%
|
|============================= | 42%
|
|=============================== | 45%
|
|================================= | 47%
|
|=================================== | 50%
|
|===================================== | 53%
|
|======================================= | 55%
|
|========================================= | 58%
|
|========================================== | 61%
|
|============================================ | 63%
|
|============================================== | 66%
|
|================================================ | 68%
|
|================================================== | 71%
|
|==================================================== | 74%
|
|===================================================== | 76%
|
|======================================================= | 79%
|
|========================================================= | 82%
|
|=========================================================== | 84%
|
|============================================================= | 87%
|
|=============================================================== | 89%
|
|================================================================ | 92%
|
|================================================================== | 95%
|
|==================================================================== | 97%
|
|======================================================================| 100%
Calculating gene attributes
Wall clock passed: Time difference of 31.45431 secs
Determine variable features
Centering data matrix
|
| | 0%
|
|================== | 25%
|
|=================================== | 50%
|
|==================================================== | 75%
|
|======================================================================| 100%
Place corrected count matrix in counts slot
Set default assay to SCT
humanClusterAverages <- AverageExpression(HumanLiverSeurat,
assays = "SCT",
slot = "scale.data")
# Replace cluster numbers with names
colnames(humanClusterAverages$SCT) <- c("Hep 1", "Alpha-beta T cells", "Hep 2",
"Inflammatory macs", "Hep 3", "Hep 4",
"Plasma cells", "NK-like cells", "Gamma-delta T cells",
"Non-inflammatory macs", "Periportal LSECs", "Central venous LSECs",
"Portal endothelial cells", "Hep 5", "Hep 6",
"Mature B cells", "Cholangiocytes", "Gamma-delta T cells 2",
"Erythroid cells", "Hepatic stellate cells")
# If only looking at specific clusters
#humanClusterAverages$SCT <- humanClusterAverages$SCT[,c("3","1","15","6","14","5")]
# Otherwise go straight to here:
humanClusterAverages$SCT <- na.omit(t(scale(t(as.matrix(humanClusterAverages$SCT)))))
# Grab gene names
humanGenes <- row.names(humanClusterAverages$SCT)
# Now turn into large dataframe
allCellsMatrix <- as.data.frame(humanClusterAverages$SCT)
# Order by row name
allCellsMatrix <- allCellsMatrix[order(row.names(allCellsMatrix)),]
# Sanity check
head(allCellsMatrix)
speciesData <- "macparland"
Correlation of woodchuck liver with human liver dataset from Aizarani et al.
# Read in Aizarani dataset
aizarani <- readRDS("~/Dropbox/Zoe/scf_version/analysis/correlationTests/GSE124395_Normalhumanliverdata.RData")
# Read in clusters and label cells
aizaraniClusters <- read.table("~/Dropbox/Zoe/scf_version/analysis/correlationTests/GSE124395_clusterpartition.txt")
# Only keep cells in the cluster object
aizarani <- aizarani[,intersect(colnames(aizarani),row.names(aizaraniClusters))]
# Create Seurat object
aizarani <- CreateSeuratObject(counts = aizarani)
Counts matrix provided is not sparse. Creating V5 assay in Seurat Object.
Warning: Feature names cannot have underscores ('_'), replacing with dashes ('-')
# Run SCTransform
aizarani <- SCTransform(aizarani)
Running SCTransform on assay: RNA
Running SCTransform on layer: counts
Using block 2 from counts to learn model.
Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = TRUE.Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = TRUE.Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = TRUE.Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = TRUE.Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = TRUE.Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = TRUE.Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = TRUE.Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = TRUE.Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = TRUE.Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = TRUE.Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = TRUE.Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = TRUE.Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = TRUE.Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = TRUE.Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = TRUE.Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = TRUE.Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = TRUE.Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = TRUE.Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = TRUE.Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = TRUE.Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = TRUE.Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = TRUE.Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = TRUE.Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = TRUE.Getting residuals for block 1(of 3) for counts dataset
Getting residuals for block 2(of 3) for counts dataset
Getting residuals for block 3(of 3) for counts dataset
Centering data matrix
|
| | 0%
|
|================== | 25%
|
|=================================== | 50%
|
|==================================================== | 75%
|
|======================================================================| 100%
Finished calculating residuals for counts
Set default assay to SCT
# Add cluster IDs
Idents(aizarani) <- aizaraniClusters$sct.cpart
# Get cluster averages
aizaraniAverages <- AverageExpression(aizarani,
assays = "SCT",
slot = "scale.data")
aizaraniAverages$SCT <- na.omit(t(scale(t(as.matrix(aizaraniAverages$SCT)))))
# Grab gene names
aizaraniGenes <- row.names(aizaraniAverages$SCT)
# Now turn into large dataframe
allCellsMatrix <- as.data.frame(aizaraniAverages$SCT)
# Order by column
allCellsMatrix <- allCellsMatrix[,as.character(sort(as.numeric(colnames(allCellsMatrix))))]
# Rename columns to be more meaningful (not totally confident I got all correct)
colnames(allCellsMatrix) <- c('NK, NKT and T cells (1)',
'Kupffer cells (2)',
'NK, NKT and T cells (3)',
'EPCAM+ cells and cholangiocytes (4)',
'NK, NKT and T cells (5)',
'Kupffer cells (6)',
'EPCAM+ cells and cholangiocytes (7)',
'B cells (8)',
'Liver sinusoidal endothelial cells (9)',
'Macrovascular endothelial cells (10)',
'Hepatocytes (11)',
'NK, NKT and T cells (12)',
'Liver sinusoidal endothelial cells (13)',
'Hepatocytes (14)',
'Other endothelial cells (15)',
'Other (16)',
'Hepatocytes (17)',
'NK, NKT and T cells (18)',
'NK, NKT and T cells (19)',
'Liver sinusoidal endothelial cells (20)',
'Macrovascular endothelial cells (21)',
'B cells (22)',
'Kupffer cells (23)',
'EPCAM+ cells and cholangiocytes (24)',
'Kupffer cells (25)',
'Other endothelial cells (26)',
'Other (27)',
'NK, NKT and T cells (28)',
'Macrovascular endothelial cells (29)',
'Hepatocytes (30)',
'Kupffer cells (31)',
'Macrovascular endothelial cells (32)',
'Stellate cells and myofibroblasts (33)',
'B cells (34)',
'Other endothelial cells (35)',
'Other (36)',
'Other (37)',
'B cells (38)',
'EPCAM+ cells and cholangiocytes (39)')
# Order by row name
allCellsMatrix <- allCellsMatrix[order(row.names(allCellsMatrix)),]
# Sanity check
head(allCellsMatrix)
speciesData <- "aizarani"
Correlation of woodchuck liver with human liver dataset from Andrews et al. (2022)
# Load dataset
humanNuc <- readRDS("~/Dropbox/Zoe/scf_version/analysis/correlationTests/single_nuc_20_human_map.rds")
# Isolate single-nuc only
humanNuc <- UpdateSeuratObject(humanNuc)
Validating object structure
Updating object slots
Ensuring keys are in the proper structure
Warning: Assay RNA changing from Assay to AssayWarning: Graph RNA_nn changing from Graph to GraphWarning: Graph RNA_snn changing from Graph to GraphWarning: DimReduc pca changing from DimReduc to DimReducWarning: DimReduc tsne changing from DimReduc to DimReducWarning: DimReduc umap changing from DimReduc to DimReducWarning: DimReduc harmony changing from DimReduc to DimReducEnsuring keys are in the proper structure
Ensuring feature names don't have underscores or pipes
Updating slots in RNA
Updating slots in RNA_nn
Setting default assay of RNA_nn to RNA
Updating slots in RNA_snn
Setting default assay of RNA_snn to RNA
Updating slots in pca
Updating slots in tsne
Setting tsne DimReduc to global
Updating slots in umap
Setting umap DimReduc to global
Updating slots in harmony
Setting assay used for RunPCA.RNA to RNA
Setting assay used for RunUMAP.RNA.pca to RNA
Setting assay used for Seurat..ProjectDim.RNA.harmony to RNA
Setting assay used for RunUMAP.RNA.harmony to RNA
No assay information could be found for RunTSNE
Warning: Adding a command log without an assay associated with itSetting assay used for FindNeighbors.RNA.harmony to RNA
No assay information could be found for FindClusters
Warning: Adding a command log without an assay associated with itValidating object structure for Assay ‘RNA’
Validating object structure for Graph ‘RNA_nn’
Validating object structure for Graph ‘RNA_snn’
Validating object structure for DimReduc ‘pca’
Validating object structure for DimReduc ‘tsne’
Validating object structure for DimReduc ‘umap’
Validating object structure for DimReduc ‘harmony’
Object representation is consistent with the most current Seurat version
humanNuc <- subset(humanNuc, subset = assay_type == "single_nuc")
# Run SCTransform
humanNuc <- SCTransform(humanNuc)
Running SCTransform on assay: RNA
vst.flavor='v2' set, setting model to use fixed slope and exclude poisson genes.
Calculating cell attributes from input UMI matrix: log_umi
Total Step 1 genes: 10432
Total overdispersed genes: 10429
Excluding 3 genes from Step 1 because they are not overdispersed.
Variance stabilizing transformation of count matrix of size 10432 by 43863
Model formula is y ~ log_umi
Get Negative Binomial regression parameters per gene
Using 2000 genes, 5000 cells
|
| | 0%
Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = TRUE.Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = TRUE.Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = TRUE.Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = TRUE.Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = TRUE.Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = TRUE.
|
|================== | 25%
Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = TRUE.Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = TRUE.Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = TRUE.Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = TRUE.Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = TRUE.Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = TRUE.
|
|=================================== | 50%
Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = TRUE.Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = TRUE.Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = TRUE.Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = TRUE.Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = TRUE.Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = TRUE.
|
|==================================================== | 75%
Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = TRUE.Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = TRUE.Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = TRUE.Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = TRUE.Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = TRUE.Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = TRUE.
|
|======================================================================| 100%
Setting estimate of 97 genes to inf as theta_mm/theta_mle < 1e-3
# of step1 poisson genes (variance < mean): 0
# of low mean genes (mean < 0.001): 0
Total # of Step1 poisson genes (theta=Inf; variance < mean): 97
Total # of poisson genes (theta=Inf; variance < mean): 3
Calling offset model for all 3 poisson genes
Found 179 outliers - those will be ignored in fitting/regularization step
Ignoring theta inf genes
Replacing fit params for 3 poisson genes by theta=Inf
Setting min_variance based on median UMI: 0.04
Second step: Get residuals using fitted parameters for 10432 genes
|
| | 0%
|
|=== | 5%
|
|======= | 10%
|
|========== | 14%
|
|============= | 19%
|
|================= | 24%
|
|==================== | 29%
|
|======================= | 33%
|
|=========================== | 38%
|
|============================== | 43%
|
|================================= | 48%
|
|===================================== | 52%
|
|======================================== | 57%
|
|=========================================== | 62%
|
|=============================================== | 67%
|
|================================================== | 71%
|
|===================================================== | 76%
|
|========================================================= | 81%
|
|============================================================ | 86%
|
|=============================================================== | 90%
|
|=================================================================== | 95%
|
|======================================================================| 100%
Computing corrected count matrix for 10432 genes
|
| | 0%
|
|=== | 5%
|
|======= | 10%
|
|========== | 14%
|
|============= | 19%
|
|================= | 24%
|
|==================== | 29%
|
|======================= | 33%
|
|=========================== | 38%
|
|============================== | 43%
|
|================================= | 48%
|
|===================================== | 52%
|
|======================================== | 57%
|
|=========================================== | 62%
|
|=============================================== | 67%
|
|================================================== | 71%
|
|===================================================== | 76%
|
|========================================================= | 81%
|
|============================================================ | 86%
|
|=============================================================== | 90%
|
|=================================================================== | 95%
|
|======================================================================| 100%
Calculating gene attributes
Wall clock passed: Time difference of 1.40099 mins
Determine variable features
Centering data matrix
|
| | 0%
|
|================== | 25%
|
|=================================== | 50%
|
|==================================================== | 75%
|
|======================================================================| 100%
Place corrected count matrix in counts slot
Set default assay to SCT
# Change idents
Idents(humanNuc) <- humanNuc@meta.data$sub_annotation
# Get cluster averages
humanNucClustAverages <- AverageExpression(humanNuc,
assays = "SCT",
slot = "scale.data")
humanNucClustAverages$SCT <- na.omit(t(scale(t(as.matrix(humanNucClustAverages$SCT)))))
# Grab gene names
nucGenes <- row.names(humanNucClustAverages$SCT)
# Now turn into large dataframe
allCellsMatrix <- as.data.frame(humanNucClustAverages$SCT)
# Order by row name
allCellsMatrix <- allCellsMatrix[order(row.names(allCellsMatrix)),]
# Sanity check
head(allCellsMatrix)
speciesData <- "andrews"
Correlation of woodchuck liver with woodchuck PBMCs. For this correlation, read in the woodchuck liver dataset at the beginning of this script and then read in the woodchuck PBMCs below
groups <- "general_cell_labels"
# Start with liver and read in woodchuck PBMCs again
load("~/Dropbox/Zoe/scf_version/analysis/healthy_sc/seurat_objects/no_dropletQC/integrated_PBMC_cca_kanchor5_scClustViz.RData")
Idents(scSeurat) <- "integrated_snn_res.0.6"
# Find cluster averages
pbmcClusterAverages <- AverageExpression(scSeurat,
assays = "SCT",
slot = "scale.data",
group.by = groups)
pbmcClusterAverages <- as.data.frame(na.omit(t(scale(t(as.matrix(pbmcClusterAverages$SCT))))))
# Order by row name
allCellsMatrix <- pbmcClusterAverages[order(row.names(pbmcClusterAverages)),]
speciesData <- "PBMC"
# Now find intersecting genes
matches <- intersect(row.names(allCellsMatrix),
row.names(woodchuckClusterAverages))
# Look at how many genes matched
length(matches)
[1] 707
# Make new matrices with only matching gene names
toCor <- allCellsMatrix[matches,]
woodchuckAveragesCor <- woodchuckClusterAverages[matches,]
# Do Pearson
pearVal <- cor(toCor, woodchuckAveragesCor, method = "pearson")
heatmap(pearVal)
#main = paste("Pearson correlation of", speciesData, "vs", woodchuckData),
#xlab = woodchuckData,
#ylab = speciesData)
#margins = c(6,11))
#Rowv = NA,
#Colv = NA)
pdf(paste("./figures/", tissue, "/", tissue, "_", speciesData, "_PearsonCor.pdf", sep = ""),
height = 15, width = 13)
heatmap(pearVal, margins = c(13,13))
dev.off()
png
2
# Do Spearman
spearVal <- cor(toCor, woodchuckAveragesCor, method = "spearman")
heatmap(spearVal)
#main = paste("Spearman correlation of", speciesData, "vs", woodchuckData),
#xlab = woodchuckData,
#ylab = speciesData)
#margins = c(6,11))
#Rowv = NA,
#Colv = NA)
pdf(paste("./figures/", tissue, "/", tissue, "_", speciesData, "_SpearmanCor.pdf", sep = ""),
height = 15, width = 13)
heatmap(spearVal, margins = c(13,13))
dev.off()
png
2